***********************************************************
* Internal migration and crime in Brazil *
* Author: Eva-Maria Egger 

* Contact: egger@wider.unu.edu
***********************************************************

* This do-file gets migration flows and IV using different distance cut-offs.

	*OUTPUTS: "MR_panel_ALL_FULLt_Dist.dta"
	
***********************************************************
* 1. : Origin-destination specific amount of migrants over period weighted using population survey weights ***

use "CENSO 2010_ind", clear
merge m:1 micregion using "$data\MR_allcodes", nogen keep(1 3)

*generate dummy for high educated individuals
g high=cond(educ_level>=3, 1, 0, .)

*keep only variables that we need: origin, destination, year of migration
keep orig orig_mr migrant time_MC micregion uf_ori uf high iweight
replace migrant=0 if orig_mr==micregion
drop if migrant==. | migrant==0
compress 

merge m:1 micregion orig_mr using "$data\MR_distancematrix", nogen keep(1 3)
tempfile indiv
save "$data/indiv", replace

//plot distance moved
histogram distance if migrant==1 & time_MC>=6 & time_MC<=9 , percent xtitle(Distance moved in kilometers)
	graph export "$graphs/Distances_moved.tif", replace
	
forv d=100(100)2000{
** all migrants who left MR in period 2000-2003 (time_MC= 6-9) for specific municipality c and who moved at least 100km
	g migD`d'all = cond(migrant==1 & time_MC>=6 & time_MC<=9 & distance>=`d' & distance!=., 1, 0, .)
}

collapse (sum) migD100all migD200all migD300all migD400all migD500all migD600all migD700all migD800all migD900all migD1000all migD1100all migD1200all migD1300all migD1400all migD1500all migD1600all migD1700all migD1800all migD1900all migD2000all ///
			[pw=iweight], by(micregion orig_mr)
	
	* generate municipality ID
	merge m:1 micregion using "$data\MR_allcodes", nogen keep(1 3)
drop region uf
	compress
save "$data\ODmig_Dist", replace

****************************************************************************
*** 2. : we compute for each origin the total number of outmigrants over all years

use "$data/indiv", clear

*OUT migration from perspective of origin municipality
forv d=100(100)2000{
	g outD`d'all = cond(migrant==1 & time_MC>=6 & time_MC<=9 & distance>=`d' & distance!=., 1, 0, .)
}
	
		*collapse to MC level by origing
collapse (sum) outD100all outD200all outD300all outD400all outD500all outD600all outD700all outD800all outD900all outD1000all outD1100all outD1200all outD1300all outD1400all outD1500all outD1600all outD1700all outD1800all outD1900all outD2000all ///
		[pw=iweight], by(orig_mr)
			
	tempfile x1
	save `x1'
	*generate migration rates relative to population in 2000
	use "$data\MR_population_allyears", clear
	merge 1:1 micregion using "$data\MR_allcodes", nogen keep(3)
	rename micregion orig_mr
	drop region uf
merge 1:1 orig_mr using `x1', nogen keep(3)
	
	foreach v in outD100all outD200all outD300all outD400all outD500all outD600all outD700all outD800all outD900all outD1000all outD1100all outD1200all outD1300all outD1400all outD1500all outD1600all outD1700all outD1800all outD1900all outD2000all{
		g r_`v' = ((100000/population_2000)*`v')
	}
	drop population*
	compress
save "$data\Outmig_Dist", replace


****************************************************************************
*** 3. : we compute for each destination the total number of IMmigrants over all years

use "$data/indiv", clear

*IN migration from perspective of destination municipality
forv d=100(100)2000{
** all migrants who left MC in period 2000-2009 (time_MC= 0-9) for specific municipality c and who moved at least 100km
	g inD`d'all = cond(migrant==1 & time_MC>=6 & time_MC<=9 & distance>=`d' & distance!=., 1, 0, .)
}
		*collapse to MC level by origing
collapse (sum) inD100all inD200all inD300all inD400all inD500all inD600all inD700all inD800all inD900all inD1000all inD1100all inD1200all inD1300all inD1400all inD1500all inD1600all inD1700all inD1800all inD1900all inD2000all ///
		[pw=iweight], by(micregion)
			
	tempfile x1
	save `x1'
	*generate migration rates relative to population in 2000
	use "$data\MR_population_allyears", clear
	merge 1:1 micregion using "$data\MR_allcodes", nogen keep(3)
	merge 1:1 micregion using `x1', nogen keep(3)

	foreach v in inD100all inD200all inD300all inD400all inD500all inD600all inD700all inD800all inD900all inD1000all inD1100all inD1200all inD1300all inD1400all inD1500all inD1600all inD1700all inD1800all inD1900all inD2000all{
		g r_`v' = ((100000/population_2000)*`v')
	}
	drop region uf population*
	compress
save "$data\Immig_Dist", replace

****************************************************************************
*** 4. : Merge these datasets into one
use "$data\ODmig_Dist", clear
merge m:1 orig_mr using "$data\Outmig_Dist", nogen keep(1 3)  
merge m:1 micregion using "$data\Immig_Dist", nogen keep(1 3)

*** 5. : Create destination-specific and origin-specific shares
forv d=100(100)2000{
		g p_out`d'all = (migD`d'all/outD`d'all)
	}
*keep only the shares as they will be used as weights or for predicting weights with distance
keep micregion orig_mr p_out100all p_out200all p_out300all p_out400all p_out500all p_out600all p_out700all p_out800all p_out900all p_out1000all p_out1100all p_out1200all p_out1300all p_out1400all p_out1500all p_out1600all p_out1700all p_out1800all p_out1900all p_out2000all

save "$data\Migrationshares_Distance_FULLt", replace

*************************************
*** Annual migration rates 
use "$data\indiv", clear

*OUT migration from perspective of origin municipality
forv d=100(100)2000{
	forv t=0/5{
		g outD`d'`t' = cond(migrant==1 & time_MC==`t' & distance>=`d' & distance!=., 1, 0, .)
	}
}

*create municipality level data of migrant flows
collapse (sum) outD* [pw=iweight], by (orig_mr)
rename orig_mr micregion
sort micregion

forv d=100(100)2000{
	tokenize 2009 2008 2007 2006 2005 2004
	local new 1
	forv t=0/5{
		rename outD`d'`t' outD`d'_``new''
		local ++new
	}
}

tempfile t2
save `t2', replace

*generate outmigration rates relative to origin population
use "$data\MR_population_allyears", clear
merge 1:1 micregion using `t2', nogen keep(3)

forv d=100(100)2000{
	forv y=2004/2009{
		g r_outD`d'_`y' = ((100000/population_`y')*outD`d'_`y')
	}
}
tempfile t2

save `t2', replace

use "$data\indiv", clear

* IM-migration: migrant numbers per metropolitan municip from perspective of destination
forv d=100(100)2000{
	forv t=0/5{
		g inD`d'`t' = cond(migrant==1 & time_MC==`t' & distance>=`d' & distance!=., 1, 0, .)
	}
}

*create municipality level data of migrant flows
collapse (sum) inD* [pw=iweight], by (micregion)
sort micregion

** Merge with annual outflows
merge m:1 micregion using `t2', nogen keep(1 3)

	*rename ending so that it's by year
forv d=100(100)2000{
	tokenize 2009 2008 2007 2006 2005 2004
	local new 1
	forv t=0/5{
		rename inD`d'`t' inD`d'_``new''
		local ++new
	}
}

*generate immigration rates relative to destination population

forv d=100(100)2000{
	forv y=2004/2009{
		g r_inD`d'_`y' = ((100000/population_`y')*inD`d'_`y')
	}
}

compress

save "$data\MR_annualMigration_Distance_FULLt", replace
	
****************************************************************************

//for OUT-migration
//Compute weighted IV:
use "$data\Migrationshares_Distance_FULLt", clear
merge m:1 micregion using "$data\MR_annualMigration_Distance_FULLt", nogen keep(1 3)
merge m:1 micregion using "$data\MR_Manufacturing_03-10", nogen keep(3)
merge m:1 orig_mr using "$data\MR_MIV_origin", nogen keep(3)
merge m:1 micregion using "$data\MR_allcodes", nogen keep(3)

forv d=100(100)2000{
	forv y=2004/2009{
				//IV1: Kleemans/Magruder weighting - Bartik demand shock
				g iv_outD`d'_`y' = p_out`d'all * MIVE`y'
				bysort micregion: egen MIVE_IND`d'_`y' = total(iv_outD`d'_`y')
				drop iv_outD`d'_`y' 
			}
		}

drop MIV20* MIVE20* F*

keep micregion MIVE_* 
collapse MIVE_*, by(micregion)

reshape long MIVE_IND100_ MIVE_IND200_ MIVE_IND300_ MIVE_IND400_ MIVE_IND500_ MIVE_IND600_ MIVE_IND700_ MIVE_IND800_ MIVE_IND900_ MIVE_IND1000_ ///
	MIVE_IND1100_ MIVE_IND1200_ MIVE_IND1300_ MIVE_IND1400_ MIVE_IND1500_ MIVE_IND1600_ MIVE_IND1700_ MIVE_IND1800_ MIVE_IND1900_ MIVE_IND2000_ ///
		, i(micregion) j(year)
xtset micregion year

foreach v in MIVE_IND100_ MIVE_IND200_ MIVE_IND300_ MIVE_IND400_ MIVE_IND500_ MIVE_IND600_ MIVE_IND700_ MIVE_IND800_ MIVE_IND900_ MIVE_IND1000_ ///
	MIVE_IND1100_ MIVE_IND1200_ MIVE_IND1300_ MIVE_IND1400_ MIVE_IND1500_ MIVE_IND1600_ MIVE_IND1700_ MIVE_IND1800_ MIVE_IND1900_ MIVE_IND2000_{
recode `v' .=0
	}

 compress
save "$data\PUSH-IV_distance_panel", replace


***********************************************

** other MC level information: all population, not just working
use "D:\data\Brazil\Censo\Censo 2010\CENSO 2010_ind", clear
merge m:1 micregion using "$data\MR_allcodes", nogen keep(1 3)

g high=(educ_level>=3) // high and low skilled workers (with and without high school)
g LF = (work_active==1) // active labour force participants
g ymale = (sex==0 & age>=16 & age<=25 & high==0) //loweducated young male 

collapse LF ymale high nonwhite youth dropouts rent_v [pw=iweight], by(micregion)
tempfile lf
save `lf'

use "D:\data\Brazil\Censo\Censo 2010\CENSO 2010_ind", clear //only working active population
merge m:1 micregion using "$data\MR_allcodes", nogen keep(1 3)
drop if work_active==0
* gen high and low skilled workers (with and without college)
g high=(educ_level>=3) // high and low skilled workers (with and without high school)

*gen dummies for variables that shall be aggregated to microregiao level, =1 for category we are interested in
g unemployed=(work_emp_main==0)
//g LF=(age>=16 & age<=60)
g public=(informal==1) //formal public job
replace informal=0 if informal==1
replace informal=1 if informal>=2 //informality (including no card, self-employed, small business <5)
g agriculture=(activ_group==1) //(agriculture)
g lwageH=lwage_ if high==1 //wage for high skilled
g lwageL=lwage_ if high==0 //wage for low skilled
g agrilwage=lwage_ if activ_group==1
g police = (occu_group==10) //if in police force

rename area urban
collapse (mean) region uf police unemployed informal public agriculture urban ///
				lwage_ lwageH lwageL agrilwage [pw=iweight]  , by(micregion)
sort micregion			
merge 1:1 micregion using `lf', nogen keep(3)
rename lwage_ lwage
	* mark variables with year ending
foreach v in lwageL lwageH lwage agrilwage rent_v high ymale urban ///
		 informal unemployed agriculture public nonwhite youth dropouts LF police{
	rename `v' `v'_2010
}
	* add geo-data coordinates
merge m:1 micregion using "$data\MR_coordinates.dta", keep(3) nogen
drop id MR pop_size
	* add data for previous years
merge m:1 micregion using "$data\MR_homicides_00-10.dta", keep(3) nogen
merge m:1 micregion using "$data\MR_population_allyears", keep(3) nogen
merge m:1 micregion using "$data\MR_annualMigration_Distance_FULLt", keep(3) nogen

sort micregion
** reshape into panel
reshape long police_ unemployed_ informal_ public_ agriculture_ urban_ lwage_ lwageH_ lwageL_ agrilwage_ LF_ ymale_ high_ ///
		nonwhite_ youth_ dropouts_ rent_v_ homicrate_ homicides_ population_ r_inD100_ r_inD200_ r_inD300_ r_inD400_ r_inD500_ ///
		r_inD600_ r_inD700_ r_inD800_ r_inD900_ r_inD1000_ r_inD1100_ r_inD1200_ r_inD1300_ r_inD1400_ r_inD1500_ r_inD1600_ ///
		r_inD1700_ r_inD1800_ r_inD1900_ r_inD2000_ ///
		, i(micregion) j(year)

xtset micregion year

bysort micregion: g homic2000_ = homicrate_[1]
replace homic2000_ = 0 if homic2000_ ==.

drop if year==2000
*generate correct year dummies, as we are keeping 2006 only for IV
g DYear = year
replace DYear = . if year<=2004 

*merge with IV data
merge 1:1 micregion year using "$data\PUSH-IV_distance_panel", nogen keep(1 3) //PUSH IVs
merge 1:1 micregion year using "$data\MR_RAIS_02-10", nogen keep(1 3) //RAIS employment data
merge 1:1 micregion year using "$data\MR_PUSH-IV_weather_panel", nogen keep(1 3) //weather IV
*generate real wages:
g lrent_v_ = log(rent_v_)
foreach w in lwage_ lwageH_ lwageL_{
	g r`w' = (`w' - lrent_v_)
}

** generate log of variables:
foreach v in homicrate_ population_ r_inD100_ r_inD200_ r_inD300_ r_inD400_ r_inD500_ ///
		r_inD600_ r_inD700_ r_inD800_ r_inD900_ r_inD1000_ r_inD1100_ r_inD1200_ r_inD1300_ r_inD1400_ r_inD1500_ r_inD1600_ ///
		r_inD1700_ r_inD1800_ r_inD1900_ r_inD2000_{  
	g l_`v' = log(`v')
}

g lwagef_ = log(averagemonthlywage) //formal sector wages
g lemp_ = log(totaljobs) // employment in formal sector

** replace missing in-migration with very small values:
forv v=100(100)2000{
	g IN_ = r_inD`v'_
	replace IN_ = runiform() if r_inD`v'_==0
	g l_IN`v'_ = log(IN_)
	drop IN_
}

tempfile main
save `main'
// merge in the sector share variable from RAIS:
tempfile m1
	use "$data\MR_Manufacturing_03-10", clear

keep micregion MRavgwage_growth* MRavgjobs_growth* Mavgwage_growth* Mavgjobs_growth* MIV* MIVE* MRIV* MRIVE* Mempshare* 

reshape long MRavgwage_growth MRavgjobs_growth Mavgwage_growth Mavgjobs_growth MIV MIVE MRIV MRIVE Mempshare , i(micregion) j(year)
save `m1'

//merge 1:1 micregion year using `c1', nogen keep(1 2 3)
merge 1:1 micregion year using `m1', nogen keep(1 2 3)

merge 1:1 micregion year using `main', nogen keep(2 3)

foreach v in MIV MIVE MRIV MRIVE {  
	recode `v' .=0 
} 

compress

foreach var in police_ unemployed_ public_ agriculture_ urban_ lwage_ lwageL_ agrilwage_ LF_ ymale_ high_ nonwhite_ youth_ dropouts_ rent_v_ informal_{
	ren `var' `var'C
}
g inequalityC  = (lwageH_/lwageL_)

tempfile main
save `main'
use "$data\PNAD_2001-09.dta", clear
merge m:1 municip using "$data\MC_allcodes.dta", nogen keep(3)
collapse (mean) rent_md active_ unemployed_ agriculture_ construction_ manufac_ manuwage_ lwage_ lhwage_ lhwageH_ lhwageL_ highe_ highs_ young_ youngUE_ formal_ emp specsum_ PNAD , by(micregion year)

merge 1:1 micregion year using `main'
g inequality  = (lhwageH_/lhwageL_)

compress
save "$data\MR_panel_ALL_FULLt_Dist", replace
**********************************************************

*done*
